Code
using Pkg; Pkg.activate("dev")This is a supplementary appendix to the research paper Endogenous Macrodynamics in Algorithmic Recourse. It contains all of the experimental results, including those not highlighted in the actual paper. It also contains additional information about the proposed counterfactual generators.
This notebook was used to run the experiments for the synthetic datasets. In the following we first run the experiments and then generate visualizations and tables.
using Pkg; Pkg.activate("dev")include("dev/utils.jl")
using AlgorithmicRecourseDynamics
using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra, Images
theme(:wong)
output_path = output_dir("synthetic")
www_path = www_dir("synthetic");max_obs = 1000
catalogue = AlgorithmicRecourseDynamics.Data.load_synthetic(max_obs)
choices = [
:linearly_separable,
:overlapping,
:circles,
:moons,
]
data_sets = filter(p -> p[1] in choices, catalogue)models = [
:LogisticRegression,
:FluxModel,
:FluxEnsemble,
]
generators = Dict(
:Greedy=>GreedyGenerator(),
:Generic=>GenericGenerator(),
:REVISE=>REVISEGenerator(),
:DICE=>DiCEGenerator(),
)experiments = set_up_experiments(data_sets,models,generators)using AlgorithmicRecourseDynamics.Models: model_evaluation
plts = []
for (exp_name, exp_) in experiments
for (M_name, M) in exp_.models
score = round(model_evaluation(M, exp_.test_data),digits=2)
plt = plot(M, exp_.test_data, title="$exp_name;\n $M_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
x_wrongly_labelled = exp_.test_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plts = vcat(plts..., plt)
end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_test_before.png"))Figure 1 shows the test data before running the experiment.
load(joinpath(www_path,"models_test_before.png"))using AlgorithmicRecourseDynamics.Models: model_evaluation
plts = []
for (exp_name, exp_) in experiments
for (M_name, M) in exp_.models
score = round(model_evaluation(M, exp_.train_data),digits=2)
plt = plot(M, exp_.train_data, title="$exp_name;\n $M_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.train_data.X)) .!= exp_.train_data.y))
x_wrongly_labelled = exp_.train_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plts = vcat(plts..., plt)
end
end
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_train_before.png"))Figure 2 shows the training data before running the experiment.
load(joinpath(www_path,"models_train_before.png"))n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
n_bootstrap = 1
T = 100
using Serialization
results = run_experiments(
experiments;
save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, n_bootstrap=n_bootstrap, T=T
)
Serialization.serialize(joinpath(output_path,"results.jls"),results)using AlgorithmicRecourseDynamics.Models: model_evaluation
plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
exp_ = res.experiment
plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
rec_sys = exp_.recourse_systems[fold]
sys_ids = collect(exp_.system_identifiers)
M = length(rec_sys)
for m in 1:M
model_name, generator_name = sys_ids[m]
M = rec_sys[m].model
score = round(model_evaluation(M, exp_.test_data),digits=2)
plt = plot(M, exp_.test_data, title="$name;\n $model_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, exp_.test_data.X)) .!= exp_.test_data.y))
x_wrongly_labelled = exp_.test_data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_test_after_$(name).png"))
endFigure 3 shows the test data after running the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"models_test_after")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) DICE
(b) Generic
(c) Greedy
(d) Latent
using AlgorithmicRecourseDynamics.Models: model_evaluation
plot_dict = Dict(key => Dict() for (key,val) in results)
fold = 1
for (name, res) in results
exp_ = res.experiment
plot_dict[name] = Dict(key => [] for (key,val) in exp_.generators)
rec_sys = exp_.recourse_systems[fold]
sys_ids = collect(exp_.system_identifiers)
M = length(rec_sys)
for m in 1:M
model_name, generator_name = sys_ids[m]
M = rec_sys[m].model
data = rec_sys[m].data
score = round(model_evaluation(M, data),digits=2)
plt = plot(M, data, title="$name;\n $model_name ($score)")
# Errors:
ids = findall(vec(round.(probs(M, data.X)) .!= data.y))
x_wrongly_labelled = data.X[:,ids]
scatter!(plt, x_wrongly_labelled[1,:], x_wrongly_labelled[2,:], ms=7.5, color=:red, label="")
plot_dict[name][generator_name] = vcat(plot_dict[name][generator_name], plt)
end
end
plot_dict = Dict(key => reduce(vcat, [plots[key] for plots in values(plot_dict)]) for (key, value) in generators)
for (name, plts) in plot_dict
plt = plot(plts..., layout=(length(choices),length(models)),size=(length(choices)*300,length(models)*300))
savefig(plt, joinpath(www_path,"models_train_after_$(name).png"))
endFigure 4 shows the training data after running the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"models_train_after")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) DICE
(b) Generic
(c) Greedy
(d) Latent
using Serialization
results = Serialization.deserialize(joinpath(output_path,"results.jls"));using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot(res)
Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endFigure 5 shows the evolution of the evaluation metrics over the course of the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"line_chart")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) Circles
(b) Linearly Separable
(c) Moons
(d) Overlapping
Figure 6 shows the evaluation metrics at the end of the experiments.
img_files = readdir(www_path)[contains.(readdir(www_path),"errorbar_chart")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) Circles
(b) Linearly Separable
(c) Moons
(d) Overlapping
n_bootstrap = 1000
using AlgorithmicRecourseDynamics.Evaluation: evaluate_system
using DataFrames
df = DataFrame()
for (key, val) in results
n_folds = length(val.experiment.recourse_systems)
for fold in 1:n_folds
for i in length(val.experiment.system_identifiers)
rec_sys = val.experiment.recourse_systems[fold][i]
model_name, gen_name = collect(val.experiment.system_identifiers)[i]
df_ = evaluate_system(rec_sys, val.experiment; n=n_bootstrap)
df_.model .= model_name
df_.generator .= gen_name
df_.fold .= fold
df = vcat(df, df_)
end
end
end
df = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df)
using RCall
save_path = joinpath(output_path, "bootstrap.csv")
using CSV
CSV.write(save_path)Figure 7 shows the chart that went into the paper.
using DataFrames, Statistics
df = results[:overlapping].output
df = df[[x ∈ [50] for x in df.n],:]
gdf = groupby(df, [:generator, :model, :n, :name, :scope])
df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin])
df_plot = df_plot[[name in [:decisiveness, :disagreement, :mmd, :mmd_grid, :model_performance] for name in df_plot.name],:]
df_plot = df_plot[.!(df_plot.name.==:mmd .&& df_plot.scope.==:model),:]
df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot)
transform!(df_plot, :name => (X -> [x=="decisiveness" ? "Decisiveness" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="disagreement" ? "Disagreement" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="mmd" ? "MMD (domain)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="mmd_grid" ? "MMD (model)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name)
transform!(df_plot, :generator => (X -> [x=="REVISE" ? "Latent" : x for x in X]) => :generator)
transform!(df_plot, :model => (X -> [x=="FluxEnsemble" ? "Deep Ensemble" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="FluxModel" ? "MLP" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="LogisticRegression" ? "Linear" : x for x in X]) => :model)
ncol = length(unique(df_plot.model))
nrow = length(unique(df_plot.name))
using RCall
scale_ = 1.5
R"""
library(data.table)
df_plot <- data.table($df_plot)
name_order <- c(
"MMD (domain)",
"MMD (model)",
"Performance",
"Disagreement",
"Decisiveness"
)
df_plot[,name:=factor(name, levels=name_order)]
model_order <- c("Linear", "MLP", "Deep Ensemble")
df_plot[,model:=factor(model, levels=model_order)]
library(ggplot2)
plt <- ggplot(df_plot) +
geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") +
geom_pointrange(aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=c(0.9,0.9)), size=0.5) +
facet_grid(
rows = vars(name),
cols = vars(model),
scales = "free_y"
) +
labs(y = "Value") +
scale_fill_discrete(name="Generator:") +
scale_colour_discrete(name="Generator:") +
theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="bottom"
)
temp_path <- file.path(tempdir(), "plot.png")
ggsave(temp_path,width=$ncol * $scale_,height=$nrow * $scale_ * 0.75)
"""
img = Images.load(rcopy(R"temp_path"))
Images.save(joinpath(www_path,"paper_synthetic_results.png"), img)Images.load(joinpath(www_path,"paper_synthetic_results.png"))using Pkg; Pkg.activate("dev")include("dev/utils.jl")
using AlgorithmicRecourseDynamics
using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra
theme(:wong)
output_path = output_dir("real_world")
www_path = www_dir("real_world");max_obs = 2500
data_sets = AlgorithmicRecourseDynamics.Data.load_real_world(max_obs)
choices = [
:cal_housing,
:credit_default,
:gmsc,
]
data_sets = filter(p -> p[1] in choices, data_sets)using CounterfactualExplanations.DataPreprocessing: unpack
bs = 50
function data_loader(data::CounterfactualData)
X, y = unpack(data)
data = Flux.DataLoader((X,y),batchsize=bs)
return data
end
model_params = (batch_norm=false,n_hidden=32,n_layers=3,dropout=true,p_dropout=0.25)models = [
:LogisticRegression,
:FluxModel,
:FluxEnsemble
]
generators = Dict(
:Greedy=>GreedyGenerator(),
:Generic=>GenericGenerator(),
:REVISE=>REVISEGenerator(),
:DICE=>DiCEGenerator(),
)experiments = set_up_experiments(
data_sets,models,generators;
pre_train_models=100, model_params=model_params,
data_loader=data_loader
)n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
n_bootstrap = 1
n_samples = 10000
T = 250
generative_model_params = (epochs=250, latent_dim=8)
using Serialization
results = run_experiments(
experiments;
save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, n_bootstrap=n_bootstrap, T=T, n_samples=n_samples,
generative_model_params=generative_model_params
)
Serialization.serialize(joinpath(output_path,"results.jls"),results)using Serialization
results = Serialization.deserialize(joinpath(output_path,"results.jls"))using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot(res)
Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endFigure 8 shows the evolution of the evaluation metrics over the course of the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"line_chart")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) California Housing
(b) Credit Default
(c) GMSC
Figure 9 shows the evaluation metrics at the end of the experiments.
img_files = readdir(www_path)[contains.(readdir(www_path),"errorbar_chart")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) California Housing
(b) Credit Default
(c) GMSC
n_bootstrap = 1000
using AlgorithmicRecourseDynamics.Evaluation: evaluate_system
using DataFrames
df = DataFrame()
for (key, val) in results
n_folds = length(val.experiment.recourse_systems)
for fold in 1:n_folds
for i in length(val.experiment.system_identifiers)
rec_sys = val.experiment.recourse_systems[fold][i]
model_name, gen_name = collect(val.experiment.system_identifiers)[i]
df_ = evaluate_system(rec_sys, val.experiment; n=n_bootstrap)
df_.model .= model_name
df_.generator .= gen_name
df_.fold .= fold
df = vcat(df, df_)
end
end
end
df = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df)
using RCall
save_path = joinpath(output_path, "bootstrap.csv")
using CSV
CSV.write(save_path)Figure 10 shows the chart that went into the paper.
using DataFrames, Statistics
model_ = :FluxEnsemble
df = DataFrame()
for (key, val) in results
df_ = deepcopy(val.output)
df_.dataset .= key
df = vcat(df,df_)
end
df = df[df.n .== maximum(df.n),:]
df = df[df.model .== model_,:]
filter!(:value => x -> !any(f -> f(x), (ismissing, isnothing, isnan)), df)
gdf = groupby(df, [:generator, :dataset, :n, :name, :scope])
df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin])
df_plot = df_plot[[name in [:mmd, :model_performance] for name in df_plot.name],:]
df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot)
df_plot.name .= [r[:name] == "mmd" ? "$(r[:name])_$(r[:scope])" : r[:name] for r in eachrow(df_plot)]
transform!(df_plot, :dataset => (X -> [x=="cal_housing" ? "California Housing" : x for x in X]) => :dataset)
transform!(df_plot, :dataset => (X -> [x=="credit_default" ? "Credit Default" : x for x in X]) => :dataset)
transform!(df_plot, :dataset => (X -> [x=="gmsc" ? "GMSC" : x for x in X]) => :dataset)
transform!(df_plot, :name => (X -> [x=="mmd_domain" ? "MMD (domain)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="mmd_model" ? "MMD (model)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name)
transform!(df_plot, :generator => (X -> [x=="REVISE" ? "Latent" : x for x in X]) => :generator)
ncol = length(unique(df_plot.dataset))
nrow = length(unique(df_plot.name))
using RCall
scale_ = 1.75
R"""
library(ggplot2)
plt <- ggplot($df_plot) +
geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") +
geom_pointrange( aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=0.9), size=0.5) +
facet_grid(
rows = vars(name),
cols = vars(dataset),
scales = "free_y"
) +
labs(y = "Value") +
scale_fill_discrete(name="Generator:") +
scale_colour_discrete(name="Generator:") +
theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="bottom"
)
temp_path <- file.path(tempdir(), "plot.png")
ggsave(temp_path,width=$ncol * $scale_,height=$nrow * $scale_ * 0.8)
"""
img = Images.load(rcopy(R"temp_path"))
Images.save(joinpath(www_path,"paper_real_world_results.png"), img)Images.load(joinpath(www_path,"paper_real_world_results.png"))using Pkg; Pkg.activate("dev")include("dev/utils.jl")
using AlgorithmicRecourseDynamics
using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra, Images
theme(:wong)
output_path = output_dir("mitigation_strategies")
www_path = www_dir("mitigation_strategies");models = [
:LogisticRegression,
:FluxModel,
:FluxEnsemble,
]
generators = Dict(
:Generic=>GenericGenerator(decision_threshold=0.5),
:Latent=>REVISEGenerator(),
:Generic_conservative=>GenericGenerator(decision_threshold=0.9),
:Gravitational=>GravitationalGenerator(),
:ClapROAR=>ClapROARGenerator()
)max_obs = 1000
catalogue = AlgorithmicRecourseDynamics.Data.load_synthetic(max_obs)
choices = [
:linearly_separable,
:overlapping,
:circles,
:moons,
]
data_sets = filter(p -> p[1] in choices, catalogue)experiments = set_up_experiments(data_sets,models,generators)n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
n_bootstrap = 1
T = 100
using Serialization
results = run_experiments(
experiments;
save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, n_bootstrap=n_bootstrap, T=T
)
Serialization.serialize(joinpath(output_path,"results_synthetic.jls"),results)using Serialization
results = Serialization.deserialize(joinpath(output_path,"results_synthetic.jls"))using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot(res)
Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endFigure 11 shows the evolution of the evaluation metrics over the course of the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"line_chart") .&& .!contains.(readdir(www_path),"latent")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) California Housing
(b) Circles
(c) Credit Default
(d) GMSC
(e) Linearly Separable
(f) Moons
(g) Overlapping
Figure 12 shows the evaluation metrics at the end of the experiments.
img_files = readdir(www_path)[contains.(readdir(www_path),"errorbar_chart") .&& .!contains.(readdir(www_path),"latent")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) California Housing
(b) Circles
(c) Credit Default
(d) GMSC
(e) Linearly Separable
(f) Moons
(g) Overlapping
n_bootstrap = 1000
using AlgorithmicRecourseDynamics.Evaluation: evaluate_system
using DataFrames
df = DataFrame()
for (key, val) in results
n_folds = length(val.experiment.recourse_systems)
for fold in 1:n_folds
for i in length(val.experiment.system_identifiers)
rec_sys = val.experiment.recourse_systems[fold][i]
model_name, gen_name = collect(val.experiment.system_identifiers)[i]
df_ = evaluate_system(rec_sys, val.experiment; n=n_bootstrap)
df_.model .= model_name
df_.generator .= gen_name
df_.fold .= fold
df = vcat(df, df_)
end
end
end
df = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df)
using RCall
save_path = joinpath(output_path, "bootstrap_synthetic.csv")
using CSV
CSV.write(save_path)Figure 13 shows the chart that went into the paper.
using DataFrames, Statistics
df = results[:overlapping].output
df = df[df.n .== maximum(df.n),:]
gdf = groupby(df, [:generator, :model, :n, :name, :scope])
df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin])
df_plot = df_plot[[name in [:mmd, :mmd_grid, :model_performance] for name in df_plot.name],:]
df_plot = df_plot[.!(df_plot.name.==:mmd .&& df_plot.scope.==:model),:]
df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot)
transform!(df_plot, :name => (X -> [x=="mmd" ? "MMD (domain)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="mmd_grid" ? "MMD (model)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name)
transform!(df_plot, :generator => (X -> [x=="Generic" ? "Generic (γ=0.5)" : x for x in X]) => :generator)
transform!(df_plot, :generator => (X -> [x=="Generic_conservative" ? "Generic (γ=0.9)" : x for x in X]) => :generator)
transform!(df_plot, :model => (X -> [x=="FluxEnsemble" ? "Deep Ensemble" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="FluxModel" ? "MLP" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="LogisticRegression" ? "Linear" : x for x in X]) => :model)
ncol = length(unique(df_plot.model))
nrow = length(unique(df_plot.name))
using RCall
scale_ = 2.0
R"""
library(data.table)
df_plot <- data.table($df_plot)
model_order <- c("Linear", "MLP", "Deep Ensemble")
df_plot[,model:=factor(model, levels=model_order)]
library(ggplot2)
plt <- ggplot($df_plot) +
geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") +
geom_pointrange( aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=0.9), size=0.5) +
facet_grid(
rows = vars(name),
cols = vars(model),
scales = "free_y"
) +
labs(y = "Value") +
scale_fill_discrete(name="Generator:") +
scale_colour_discrete(name="Generator:") +
theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="bottom"
) +
guides(fill=guide_legend(ncol=3))
temp_path <- file.path(tempdir(), "plot.png")
ggsave(temp_path,width=$ncol * $scale_,height=$nrow * $scale_ * 0.8)
"""
img = Images.load(rcopy(R"temp_path"))
Images.save(joinpath(www_path,"paper_synthetic_results.png"), img)Images.load(joinpath(www_path,"paper_synthetic_results.png"))generators = Dict(
:Latent=>GenericGenerator(decision_threshold=0.5),
:Latent_conservative=>GenericGenerator(decision_threshold=0.9),
:Gravitational=>GravitationalGenerator(),
:ClapROAR=>ClapROARGenerator()
)experiments = set_up_experiments(data_sets,models,generators)n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
n_bootstrap = 1
T = 100
using Serialization
results = run_experiments(
experiments;
save_path=output_path,evaluate_every=evaluate_every,n_rounds=n_rounds, n_folds=n_folds, n_bootstrap=n_bootstrap, T=T
)
Serialization.serialize(joinpath(output_path,"results_synthetic_latent.jls"),results)using Serialization
results = Serialization.deserialize(joinpath(output_path,"results_synthetic_latent.jls"))using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot(res)
Images.save(joinpath(www_path, "line_chart_latent_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_latent_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endFigure 14 shows the evolution of the evaluation metrics over the course of the experiment.
img_files = readdir(www_path)[contains.(readdir(www_path),"line_chart") .&& contains.(readdir(www_path),"latent")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) Circles
(b) Linearly Separable
(c) Moons
(d) Overlapping
Figure 15 shows the evaluation metrics at the end of the experiments.
img_files = readdir(www_path)[contains.(readdir(www_path),"errorbar_chart") .&& contains.(readdir(www_path),"latent")]
img_files = joinpath.(www_path,img_files)
for img in img_files
display(load(img))
end(a) Circles
(b) Linearly Separable
(c) Moons
(d) Overlapping
n_bootstrap = 1000
using AlgorithmicRecourseDynamics.Evaluation: evaluate_system
using DataFrames
df = DataFrame()
for (key, val) in results
n_folds = length(val.experiment.recourse_systems)
for fold in 1:n_folds
for i in length(val.experiment.system_identifiers)
rec_sys = val.experiment.recourse_systems[fold][i]
model_name, gen_name = collect(val.experiment.system_identifiers)[i]
df_ = evaluate_system(rec_sys, val.experiment; n=n_bootstrap)
df_.model .= model_name
df_.generator .= gen_name
df_.fold .= fold
df = vcat(df, df_)
end
end
end
df = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df)
using RCall
save_path = joinpath(output_path, "bootstrap_latent.csv")
using CSV
CSV.write(save_path)Figure 16 shows the chart that went into the paper.
using DataFrames, Statistics
df = results[:overlapping].output
df = df[df.n .== maximum(df.n),:]
gdf = groupby(df, [:generator, :model, :n, :name, :scope])
df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin])
df_plot = df_plot[[name in [:mmd, :mmd_grid, :model_performance] for name in df_plot.name],:]
df_plot = df_plot[.!(df_plot.name.==:mmd .&& df_plot.scope.==:model),:]
df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot)
transform!(df_plot, :name => (X -> [x=="mmd" ? "MMD (domain)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="mmd_grid" ? "MMD (model)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name)
transform!(df_plot, :generator => (X -> [x=="Latent" ? "Latent (γ=0.5)" : x for x in X]) => :generator)
transform!(df_plot, :generator => (X -> [x=="Latent_conservative" ? "Latent (γ=0.9)" : x for x in X]) => :generator)
transform!(df_plot, :model => (X -> [x=="FluxEnsemble" ? "Deep Ensemble" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="FluxModel" ? "MLP" : x for x in X]) => :model)
transform!(df_plot, :model => (X -> [x=="LogisticRegression" ? "Linear" : x for x in X]) => :model)
ncol = length(unique(df_plot.model))
nrow = length(unique(df_plot.name))
using RCall
scale_ = 1.9
R"""
library(data.table)
df_plot <- data.table($df_plot)
model_order <- c("Linear", "MLP", "Deep Ensemble")
df_plot[,model:=factor(model, levels=model_order)]
library(ggplot2)
plt <- ggplot($df_plot) +
geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") +
geom_pointrange( aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=0.9), size=0.5) +
facet_grid(
rows = vars(name),
cols = vars(model),
scales = "free_y"
) +
labs(y = "Value") +
scale_fill_discrete(name="Generator:") +
scale_colour_discrete(name="Generator:") +
theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="bottom"
) +
guides(fill=guide_legend(ncol=4))
temp_path <- file.path(tempdir(), "plot.png")
ggsave(temp_path,width=$ncol * $scale_,height=$nrow * $scale_ * 0.8)
"""
img = Images.load(rcopy(R"temp_path"))
Images.save(joinpath(www_path,"paper_synthetic_latent_results.png"), img)Images.load(joinpath(www_path,"paper_synthetic_latent_results.png"))generators = Dict(
:Generic=>GenericGenerator(decision_threshold=0.5),
:Latent=>REVISEGenerator(),
:Generic_conservative=>GenericGenerator(decision_threshold=0.9),
:Gravitational=>GravitationalGenerator(),
:ClapROAR=>ClapROARGenerator()
)max_obs = 2500
data_sets = AlgorithmicRecourseDynamics.Data.load_real_world(max_obs)using CounterfactualExplanations.DataPreprocessing: unpack
bs = 50
function data_loader(data::CounterfactualData)
X, y = unpack(data)
data = Flux.DataLoader((X,y),batchsize=bs)
return data
end
model_params = (batch_norm=false,n_hidden=32,n_layers=3,dropout=true,p_dropout=0.25)experiments = set_up_experiments(
data_sets,models,generators;
pre_train_models=100, model_params=model_params,
data_loader=data_loader
)n_evals = 5
n_rounds = 50
evaluate_every = Int(round(n_rounds/n_evals))
n_folds = 5
n_bootstrap = 1
n_samples = 10000
T = 250
using Serialization
results = run_experiments(
experiments;
save_path=output_path,
evaluate_every=evaluate_every,
n_rounds=n_rounds,
n_folds=n_folds,
n_bootstrap=n_bootstrap,
T=T
)
Serialization.serialize(joinpath(output_path,"results_real_world.jls"),results)using Serialization
results = Serialization.deserialize(joinpath(output_path,"results_real_world.jls"))using Images
line_charts = Dict()
errorbar_charts = Dict()
for (data_name, res) in results
plt = plot(res)
Images.save(joinpath(www_path, "line_chart_$(data_name).png"), plt)
line_charts[data_name] = plt
plt = plot(res,maximum(res.output.n))
Images.save(joinpath(www_path, "errorbar_chart_$(data_name).png"), plt)
errorbar_charts[data_name] = plt
endn_bootstrap = 1000
using AlgorithmicRecourseDynamics.Evaluation: evaluate_system
using DataFrames
df = DataFrame()
for (key, val) in results
n_folds = length(val.experiment.recourse_systems)
for fold in 1:n_folds
for i in length(val.experiment.system_identifiers)
rec_sys = val.experiment.recourse_systems[fold][i]
model_name, gen_name = collect(val.experiment.system_identifiers)[i]
df_ = evaluate_system(rec_sys, val.experiment; n=n_bootstrap)
df_.model .= model_name
df_.generator .= gen_name
df_.fold .= fold
df = vcat(df, df_)
end
end
end
df = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df)
using RCall
save_path = joinpath(output_path, "bootstrap_real_world.csv")
using CSV
CSV.write(save_path)Figure 16 shows the chart that went into the paper.
using DataFrames, Statistics
model_ = :FluxEnsemble
df = DataFrame()
for (key, val) in results
df_ = deepcopy(val.output)
df_.dataset .= key
df = vcat(df,df_)
end
df = df[df.n .== maximum(df.n),:]
df = df[df.model .== model_,:]
filter!(:value => x -> !any(f -> f(x), (ismissing, isnothing, isnan)), df)
gdf = groupby(df, [:generator, :dataset, :n, :name, :scope])
df_plot = combine(gdf, :value => (x -> [(mean(x),mean(x)+std(x),mean(x)-std(x))]) => [:mean, :ymax, :ymin])
df_plot = df_plot[[name in [:mmd, :model_performance] for name in df_plot.name],:]
df_plot = df_plot[.!(df_plot.name.==:mmd .&& df_plot.scope.!=:model),:]
df_plot = mapcols(x -> typeof(x) == Vector{Symbol} ? string.(x) : x, df_plot)
transform!(df_plot, :dataset => (X -> [x=="cal_housing" ? "California Housing" : x for x in X]) => :dataset)
transform!(df_plot, :dataset => (X -> [x=="credit_default" ? "Credit Default" : x for x in X]) => :dataset)
transform!(df_plot, :dataset => (X -> [x=="gmsc" ? "GMSC" : x for x in X]) => :dataset)
transform!(df_plot, :name => (X -> [x=="mmd" ? "MMD (model)" : x for x in X]) => :name)
transform!(df_plot, :name => (X -> [x=="model_performance" ? "Performance" : x for x in X]) => :name)
transform!(df_plot, :generator => (X -> [x=="Generic" ? "Generic (γ=0.5)" : x for x in X]) => :generator)
transform!(df_plot, :generator => (X -> [x=="Generic_conservative" ? "Generic (γ=0.9)" : x for x in X]) => :generator)
ncol = length(unique(df_plot.dataset))
nrow = length(unique(df_plot.name))
using RCall
scale_ = 2.0
R"""
library(ggplot2)
plt <- ggplot($df_plot) +
geom_bar(aes(x=n, y=mean, fill=generator), stat="identity", alpha=0.5, position="dodge") +
geom_pointrange( aes(x=n, y=mean, ymin=ymin, ymax=ymax, colour=generator), alpha=0.9, position=position_dodge(width=0.9), size=0.5) +
facet_grid(
rows = vars(name),
cols = vars(dataset),
scales = "free_y"
) +
labs(y = "Value") +
scale_fill_discrete(name="Generator:") +
scale_colour_discrete(name="Generator:") +
theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="bottom"
) +
guides(fill=guide_legend(ncol=3))
temp_path <- file.path(tempdir(), "plot.png")
ggsave(temp_path,width=$ncol * $scale_,height=$nrow * $scale_ * 0.85)
"""
img = Images.load(rcopy(R"temp_path"))
Images.save(joinpath(www_path,"paper_real_world_results.png"), img)Images.load(joinpath(www_path,"paper_real_world_results.png"))using Pkg; Pkg.activate("dev")include("dev/utils.jl")
using AlgorithmicRecourseDynamics
using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra
theme(:wong)
output_path = output_dir("generator")
www_path = www_dir("generator")using MLJ
N = 1000
X, ys = make_blobs(N, 2; centers=2, as_table=false, center_box=(-5 => 5), cluster_std=0.5)
ys .= ys.==2
X = X'
xs = Flux.unstack(X,2)
data = zip(xs,ys)
counterfactual_data = CounterfactualData(X,ys')using Flux
nn = Chain(Dense(2,1))
using Flux.Optimise: update!, ADAM
opt = ADAM()
epochs = 100
loss(x, y) = Flux.Losses.logitbinarycrossentropy(nn(x), y)
avg_loss(data) = mean(map(d -> loss(d[1],d[2]), data))
show_every = epochs/10
for epoch = 1:epochs
for d in data
gs = gradient(Flux.params(nn)) do
l = loss(d...)
end
update!(opt, Flux.params(nn), gs)
end
if epoch % show_every == 0
println("Epoch " * string(epoch))
@show avg_loss(data)
end
endM = FluxModel(nn)x = select_factual(counterfactual_data, rand(1:size(X)[2]))
y = round(probs(M, x)[1])
target = ifelse(y==1.0,0.0,1.0) # opposite label as target
T = 100Λ₂ = [0.1, 1, 5]
counterfactuals_strict = []
generators = []
for λ₂ ∈ Λ₂
λ = [0.1, λ₂]
generator = GravitationalGenerator(λ=λ)
generators = vcat(generators..., generator)
counterfactuals_strict = vcat(
counterfactuals_strict...,
generate_counterfactual(x, target, counterfactual_data, M, generator; convergence=:strict, T=T)
)
endtheme(:wong)
T_ = 500
plts = []
for i ∈ 1:length(Λ₂)
λ₂ = Λ₂[i]
counterfactual = counterfactuals_strict[i]
plt = plot(counterfactual, plot_up_to=minimum([T,T_]), title="λ₂=$(λ₂)")
x1,x2 = generators[i].centroid[1], generators[i].centroid[2]
scatter!(plt, [x1], [x2], colour=:purple, label="attractor")
plts = vcat(plts..., plt)
end
plt = plot(plts..., size=(1200,300), layout=(1,3))
savefig(plt, joinpath(www_path,"gravitational_generator_strict.png"))Λ₂ = [0.1, 1, 5]
counterfactuals = []
generators = []
for λ₂ ∈ Λ₂
λ = [0.1, λ₂]
generator = GravitationalGenerator(λ=λ)
generators = vcat(generators..., generator)
counterfactuals = vcat(
counterfactuals...,
generate_counterfactual(x, target, counterfactual_data, M, generator)
)
endtheme(:wong)
T_ = 500
plts = []
for i ∈ 1:length(Λ₂)
λ₂ = Λ₂[i]
counterfactual = counterfactuals[i]
plt = plot(counterfactual, plot_up_to=minimum([T,T_]), title="λ₂=$(λ₂)")
x1,x2 = generators[i].centroid[1], generators[i].centroid[2]
scatter!(plt, [x1], [x2], colour=:purple, label="attractor")
plts = vcat(plts..., plt)
end
plt = plot(plts..., size=(1400,300), layout=(1,3))
savefig(plt, joinpath(www_path,"gravitational_generator.png"))idx = 1
x1,x2 = generators[idx].centroid[1], generators[idx].centroid[2]
plt1 = plot(counterfactuals[idx], plot_up_to=minimum([T,T_]), title="Simple")
scatter!(plt1, [x1], [x2], colour=:purple, label="attractor")
plt2 = plot(counterfactuals_strict[idx], plot_up_to=minimum([T,T_]), title="Strict")
scatter!(plt2, [x1], [x2], colour=:purple, label="attractor")
plt = plot(plt1, plt2, size=(850,350), layout=(1,2))
savefig(plt, joinpath(www_path,"gravitational_generator_comparison.png"))using Pkg; Pkg.activate("dev")include("dev/utils.jl")
using AlgorithmicRecourseDynamics
using CounterfactualExplanations, Flux, Plots, PlotThemes, Random, LaplaceRedux, LinearAlgebra
theme(:wong)
output_path = output_dir("generator")
www_path = www_dir("generator")using MLJ
N = 1000
X, ys = make_blobs(N, 2; centers=2, as_table=false, center_box=(-5 => 5), cluster_std=0.1)
ys .= ys.==2
X = X'
xs = Flux.unstack(X,2)
data = zip(xs,ys)
counterfactual_data = CounterfactualData(X,ys')using Flux
nn = Chain(Dense(2,1))
using Flux.Optimise: update!, ADAM
opt = ADAM()
epochs = 100
loss(x, y) = Flux.Losses.logitbinarycrossentropy(nn(x), y)
avg_loss(data) = mean(map(d -> loss(d[1],d[2]), data))
show_every = epochs/10
grad_norms = []
using LinearAlgebra
for epoch = 1:epochs
grads_ = []
for d in data
gs = gradient(Flux.params(nn)) do
l = loss(d...)
end
update!(opt, Flux.params(nn), gs)
grads_ = vcat(grads_..., gs)
end
if epoch % show_every == 0
println("Epoch " * string(epoch))
@show avg_loss(data)
end
grad_norms = vcat(grad_norms..., norm(grads_))
endM = FluxModel(nn)x = select_factual(counterfactual_data, rand(1:size(X)[2]))
y = round(probs(M, x)[1])
target = ifelse(y==1.0,0.0,1.0) # opposite label as target
T = 100generator = GenericGenerator()
ce = generate_counterfactual(x, target,counterfactual_data, M, generator)x_ = path(ce)
y_ = CounterfactualExplanations.Counterfactuals.counterfactual_label_path(ce)
data_ = zip(x_,y_)
grad_norms = []
loss_ = []
for d in data_
gs = gradient(Flux.params(nn)) do
loss(d...)
end
loss_ = vcat(loss_...,loss(d...))
grad_norms = vcat(grad_norms..., norm(gs))
endΛ₂ = [0.1, 1, 5]
counterfactuals_strict = []
generators = []
for λ₂ ∈ Λ₂
λ = [0.1, λ₂]
generator = ClapROARGenerator(λ=λ)
generators = vcat(generators..., generator)
counterfactuals_strict = vcat(
counterfactuals_strict...,
generate_counterfactual(x, target, counterfactual_data, M, generator; T=T)
)
endtheme(:wong)
T_ = 500
plts = []
for i ∈ 1:length(Λ₂)
λ₂ = Λ₂[i]
counterfactual = counterfactuals_strict[i]
plt = plot(counterfactual, plot_up_to=minimum([T,T_]), title="λ₂=$(λ₂)")
plts = vcat(plts..., plt)
end
plt = plot(plts..., size=(1200,300), layout=(1,3))
# savefig(plt, joinpath(www_path,"endo_roar_generator.png"))generators = Dict(
:Generic => GenericGenerator(),
:ROAR => ClapROARGenerator()
)
counterfactuals = Dict([name => generate_counterfactual(x, target, counterfactual_data, M, gen; T=T) for (name, gen) in generators])plts = []
for (name,ce) ∈ counterfactuals
plt = plot(ce, plot_up_to=minimum([T,T_]), title=name)
plts = vcat(plts..., plt)
end
plt = plot(plts..., size=(800,300), layout=(1,2))
display(plt)